Projet Chamois
1 Chargement des librairies
library(tidyverse)
library(corrplot)
library(lmerTest)
library(ade4)
library(splines)
library(car)
library(plotly)
library(DT)
library(Hmisc)2 Prealable
2.1 Import des donnees
setwd(".")
load('cham.Rdata')
datatable(cham, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )2.2 Elimination des donnees aberrantes
Les chamois observés après leur mort ou avant leur naissance sont retirés du jeu de donnees.
cham <- cham[cham$year<=cham$ydth,]
cham <- cham[cham$year>=cham$coh,]2.3 Creation des variables age et longevite (long)
cham2 <- cham %>%
summarise(cham, age= year-coh, long=ydth-coh)3 Lien fecondite annuelle et age des femelles
3.1 Représentation graphique des données
3.1.1 Représentation par classe d’age
cham_age <- cham2 %>%
group_by(age) %>%
dplyr::summarise(totnaissance= sum(fec), taillepop=n(), fecmean=totnaissance/n())
cham_age$fecmean <- round(cham_age$fecmean, 2)
plot1 <- ggplot(cham_age, aes(x=age, y=fecmean)) +
geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") +
labs(title = "Fécondité moyenne de la population en fonction de l'age",x="Age", y="Fécondité moyenne de la population") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth()
ggplotly(plot1)3.1.2 Representation sans grouper par classe d’age
3.1.2.1 Utilisation de la fonction jitter
plot2 <- ggplot(cham2, aes(x=age, y=fec)) +
geom_jitter(width = 0.55, height = 0) +
labs(title = "Fécondité annuelle en fonction de l'age",x="Age", y="Fécondité annuelle") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth()
ggplotly(plot2)3.1.2.2 Utilisation de la fonction geom_count
plot3 <- ggplot(cham2, aes(x=age, y=fec)) +
geom_count() +
labs(title = "Fécondité annuelle en fonction de l'age",x="Age", y="Fécondité annuelle")+
theme(plot.title = element_text(hjust = 0.5))+
geom_smooth(); plot33.2 Analyse statistique du lien entre fecondite annuelle et l’age des femelles
3.2.1 Modèles de régression lineaire generalise avec effets aléatoires
3.2.1.1 First
Le premier modele applique un modele glm1 qui utilise la fonction de lien binomiale et la variable “id” comme effet aléatoire pour prendre en compte le fait que les observations sont repetees sur les mêmes individus sur plusieurs annees.
glm1 <- glmer(fec ~ age + (1| id),data=cham2, family = binomial)
summary(glm1)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1212.3 1226.7 -603.2 1206.3 906
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9150 -1.0621 0.6421 0.7819 1.1219
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.3319 0.5761
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.83210 0.20527 4.054 5.04e-05 ***
## age -0.04723 0.01946 -2.426 0.0152 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.905
Anova(glm1)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## age 5.8874 1 0.01525 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm1 <- 1212/906;surdispersion_glm1## [1] 1.337748
Interpretation des coefficients:
summary(glm1)$coefficients## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.83209798 0.2052743 4.053590 5.043751e-05
## age -0.04722673 0.0194638 -2.426388 1.524995e-02
exp(summary(glm1)$coefficients[2])## [1] 0.9538711
Coeff <- ((1/exp(summary(glm1)$coefficients[2]))-1)*100Avec le modele glm1, AIC/df = 1.3 donc il n’y a pas de surdispersion observee. Il est 4.8359681% moins vraisemblable que les chamois aient un petit lorsque leur age augmente d’un an (p-value = 0.0152).
3.2.1.2 Second
On ajoute la variable “year” en effet additif ou multiplicatif car il pourrait y avoir un effet “annee” dans la tendance observee sur la fecondite annuelle.
glm2 <- glmer(fec ~ age+ year + (1| id),data=cham2, family = binomial)## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.166371 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
La variable “year” doit être centree normee pour pouvoir faire converger le modele.
year_scale<-scale(cham2$year, center=TRUE, scale= TRUE)
glm2 <- glmer(fec ~ age+ year_scale + (1| id),data=cham2, family = binomial)
summary(glm2)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + year_scale + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1212.4 1231.7 -602.2 1204.4 905
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0424 -1.0533 0.6394 0.7823 1.1856
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.327 0.5719
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.75566 0.21150 3.573 0.000353 ***
## age -0.03888 0.02023 -1.922 0.054627 .
## year_scale -0.11728 0.08562 -1.370 0.170774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age
## age -0.911
## year_scale 0.253 -0.291
glm3 <- glmer(fec ~ age*year_scale + (1| id),data=cham2, family = binomial)
summary(glm3)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age * year_scale + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1212.3 1236.4 -601.2 1202.3 904
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9099 -1.0508 0.6212 0.7703 1.2340
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.398 0.6309
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.78482 0.21836 3.594 0.000325 ***
## age -0.04011 0.02080 -1.929 0.053787 .
## year_scale 0.16827 0.21931 0.767 0.442931
## age:year_scale -0.02916 0.02058 -1.417 0.156513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age yr_scl
## age -0.909
## year_scale 0.202 -0.171
## age:yer_scl -0.100 0.048 -0.916
On n’observe pas de surdispersion pour les deux modeles testes. Que ce soit en effet mutiplicatif ou additif, la variable “year” n’a pas un effet significatif en addition à la variable “age” (p value > 0.1 en effet additif et multiplicatif). La variable “age” a un effet plus marque que la variable “year” avec une p value proche de 0.5.
3.2.1.3 Third
Test du modele avec deux effets aleatoires (variables “id” et “year”)
glm4<- glmer(fec ~ age + (1| id) + (1| year),data=cham2, family = binomial)
summary(glm4)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + (1 | id) + (1 | year)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1212.2 1231.5 -602.1 1204.2 905
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0185 -1.0136 0.6258 0.7816 1.2299
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.34280 0.5855
## year (Intercept) 0.07463 0.2732
## Number of obs: 909, groups: id, 163; year, 26
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.82199 0.21943 3.746 0.00018 ***
## age -0.04585 0.01985 -2.310 0.02089 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.867
Anova(glm4)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## age 5.3362 1 0.02089 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm4 <- 1212/905;surdispersion_glm4## [1] 1.339227
Les resultats du modele glm4 sont similaires à ceux du modele glm1 (AIC, df, p value et coefficients similaires) donc le fait d’ajouter la variable “year” comme effets aléatoires n’apportent pas plus d’explication sur la variance observee.
4 Variation de la fecondite annuelle en fonction du temps
4.1 Représentation graphique des données
4.1.1 Représentation graphique par annee
cham_ans = cham2 %>%
group_by(year) %>%
dplyr::summarise(totnaissance= sum(fec), taillepop=n(), agemoyen=mean(age)) %>%
mutate(fecperan=totnaissance/taillepop)
cham_ans$fecperan <- round(cham_ans$fecperan, 2)plot4 <- ggplot(cham_ans, aes(x=year, y=fecperan)) +
geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") +
labs(title = "Fécondité moyenne de la population en fonction des annees",x="Annees", y="Fécondité moyenne") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth(); plot4
plot5 <- ggplot(data = cham_ans, aes(x = year,y=agemoyen))+
labs(title = "Age moyen de la population en fonction des annees",x="Annees", y="Age moyen") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point()+
geom_smooth(); plot54.1.2 Représentation graphique sans grouper par annee
plot6 <- ggplot(cham2, aes(x=year, y=fec)) +
labs(title = "Fécondité annuelle en fonction des annees",x="Annees", y="Fécondité annuelle") +
geom_jitter(width = 0.55, height = 0) +
geom_smooth()+
theme(plot.title = element_text(hjust = 0.5)); plot6
plot7 <- ggplot(cham2, aes(x=year, y=fec)) +
geom_count() +
labs(title = "Fécondité annuelle en fonction des annees",x="Annees", y="Fécondité annuelle") +
theme(plot.title = element_text(hjust = 0.5))+
geom_smooth(); plot7
plot8 <- ggplot(data = cham_ans, aes(x = year,y=agemoyen))+
labs(title = "Age moyen de la population en fonction des annees",x="Annees", y="Age moyen") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point()+
geom_smooth(); plot84.2 Analyse statistique du lien entre fécondité annuelle et temps
4.2.1 Modèles de régression lineaire generalise avec effets aléatoires
4.2.1.1 First
Le premier modele applique un modele glm5 qui utilise la fonction de lien binomiale et la variable “id” comme effet aléatoire pour prendre en compte le fait que les observations sont repetees sur les mêmes individus sur plusieurs annees.
glm5 <- glmer(fec ~ year_scale + (1| id),data=cham2, family = binomial)
summary(glm5)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year_scale + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1214.1 1228.6 -604.1 1208.1 906
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8977 -1.0434 0.6474 0.7799 1.1928
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.3358 0.5794
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.38601 0.08734 4.42 9.89e-06 ***
## year_scale -0.16692 0.08263 -2.02 0.0434 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## year_scale -0.028
Anova(glm5)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## year_scale 4.0807 1 0.04337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
surdispersion_glm5 <- 1214/906;surdispersion_glm5## [1] 1.339956
Pas de surdispersion observee. D’apres la p-value<0.05, il y a aurait un effet significatif de la variable “year” sur la fecondite annuelle. Or, graphiquement, l’effet de la variable “year” semble negligeable. On se rend compte que l’age moyen de la population augmente avec les annees et on sait grace au modele glm1 que la variable “age” a un effet significatif sur la fecondite annuelle. Ainsi, il est important de regarder le modele glm3 qui presente les effets multiplicatids “year” et “age”.
glm3 <- glmer(fec ~ age*year_scale + (1| id),data=cham2, family = binomial)
summary(glm3)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age * year_scale + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1212.3 1236.4 -601.2 1202.3 904
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9099 -1.0508 0.6212 0.7703 1.2340
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.398 0.6309
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.78482 0.21836 3.594 0.000325 ***
## age -0.04011 0.02080 -1.929 0.053787 .
## year_scale 0.16827 0.21931 0.767 0.442931
## age:year_scale -0.02916 0.02058 -1.417 0.156513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age yr_scl
## age -0.909
## year_scale 0.202 -0.171
## age:yer_scl -0.100 0.048 -0.916
La variable “age” a un effet avec une p value proche de 0.5 alors que les p value associees à la variable “year” et aux effets multiplicatifs ne sont pas significatifs (p valu>0.1). La variable “year” n’a donc pas d’effet significatif sur la fecondite annuelle des chamois.
5 Lien entre fecondite totale et longevite des animaux
5.1 Représentation graphique des données
cham_id = cham2 %>%
group_by(id) %>%
dplyr::summarise(feconditetotale= sum(fec), long=long, pds=pds, anneetot=(ydth-min(year)+1), MinAn=min(year), MaxAn=max(year), AgePds=min(age)) %>%
unique()%>%
mutate(fectotrelative=feconditetotale/anneetot)%>%
drop_na(long)plot9 <-ggplot(cham_id, aes(x=long, y=feconditetotale)) +
geom_count() +
labs(title = "Fecondite totale en fonction de la longevite",x="Longevite", y="Fecondite totale") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth(); plot9
plot10 <- ggplot(cham_id, aes(x=long, y=feconditetotale)) +
geom_jitter(width = 0.25, height = 0.25)+
labs(title = "Fecondite totale en fonction de la longevite",x="Longevite", y="Fecondite totale") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth(); plot10
Tous les chamois n’ont pas été suivis le même nombre d’annee. Pour
prendre en compte ce biais dans la fecondite totale des chamois, la
fecondite totale a ete divisee par le nombre d’annees de suivi.
plot11 <- ggplot(data = cham_id, aes(x=long, y=fectotrelative))+
geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
labs(title = "Fecondite sur le nombre d'annees suivies en fonction de la longevite",x="Longevite", y="Fecondite relative au nombre d'annees suivies") +
geom_smooth() +
theme(plot.title = element_text(hjust = 0.5)); plot115.2 Analyse statistique du lien entre la fécondité annuelle et la longevite
5.2.1 Tests de modèles de régression lineaire generalise avec effets aléatoires
5.2.1.1 First => Voir avec Karim pour questions. Pas l’impression de pouvoir deduire de relation quand prise en compte du nombre d’annees de suivi dans la relation
test1 <- lm(feconditetotale~long, data = cham_id)
summary(test1)##
## Call:
## lm(formula = feconditetotale ~ long, data = cham_id)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2395 -1.6491 -0.3901 1.7214 8.5738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.28163 0.63538 0.443 0.658
## long 0.25904 0.05098 5.081 1.03e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.66 on 161 degrees of freedom
## Multiple R-squared: 0.1382, Adjusted R-squared: 0.1328
## F-statistic: 25.81 on 1 and 161 DF, p-value: 1.031e-06
plot(test1)hist(resid(test1))#Non homogeneite des residus
test2 <- lm(feconditetotale~log(long), data = cham_id)
summary(test2)##
## Call:
## lm(formula = feconditetotale ~ log(long), data = cham_id)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3236 -1.8056 -0.3497 1.7138 8.6503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.0910 1.2331 -2.507 0.0132 *
## log(long) 2.6837 0.5079 5.283 4.07e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.645 on 161 degrees of freedom
## Multiple R-squared: 0.1478, Adjusted R-squared: 0.1425
## F-statistic: 27.91 on 1 and 161 DF, p-value: 4.066e-07
plot(test2)#Non homogeneite des residus
test3 <- lm(sqrt(feconditetotale)~long, data = cham_id)
summary(test3)##
## Call:
## lm(formula = sqrt(feconditetotale) ~ long, data = cham_id)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.33973 -0.44205 0.02458 0.62207 1.71466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.86498 0.19205 4.504 1.28e-05 ***
## long 0.06412 0.01541 4.161 5.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8041 on 161 degrees of freedom
## Multiple R-squared: 0.09708, Adjusted R-squared: 0.09148
## F-statistic: 17.31 on 1 and 161 DF, p-value: 5.151e-05
plot(test3)#Non homogeneite des residus
cham_id2 <- cham_id[cham_id$feconditetotale!=0,]
test4 <- lm(log(feconditetotale)~(log(long)), data = cham_id2)
summary(test4)##
## Call:
## lm(formula = log(feconditetotale) ~ (log(long)), data = cham_id2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46036 -0.48276 0.06982 0.58994 1.20898
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.1012 0.3569 -3.086 0.00243 **
## log(long) 0.8863 0.1468 6.039 1.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6879 on 145 degrees of freedom
## Multiple R-squared: 0.201, Adjusted R-squared: 0.1955
## F-statistic: 36.47 on 1 and 145 DF, p-value: 1.24e-08
plot(test4)#Non homogeneite des residus
test5 <- lm(log(feconditetotale+1)~(long), data = cham_id2)
summary(test5)##
## Call:
## lm(formula = log(feconditetotale + 1) ~ (long), data = cham_id2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09484 -0.34652 -0.01657 0.41983 0.97945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.63262 0.13315 4.751 4.82e-06 ***
## long 0.06419 0.01075 5.971 1.74e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5007 on 145 degrees of freedom
## Multiple R-squared: 0.1974, Adjusted R-squared: 0.1918
## F-statistic: 35.65 on 1 and 145 DF, p-value: 1.736e-08
plot(test5)#Non homogeneite des residus
test6 <- lm(fectotrelative~long, data = cham_id)
summary(test6)##
## Call:
## lm(formula = fectotrelative ~ long, data = cham_id)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57107 -0.22551 0.01043 0.21283 0.44332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5545229 0.0694994 7.979 2.61e-13 ***
## long 0.0007195 0.0055768 0.129 0.898
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.291 on 161 degrees of freedom
## Multiple R-squared: 0.0001034, Adjusted R-squared: -0.006107
## F-statistic: 0.01664 on 1 and 161 DF, p-value: 0.8975
plot(test6)#Non homogeneite des residus
test7 <- lm(log(fectotrelative)~long, data = cham_id2)
summary(test7)##
## Call:
## lm(formula = log(fectotrelative) ~ long, data = cham_id2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.51798 -0.28228 0.07845 0.31246 0.59766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.613173 0.113211 -5.416 2.47e-07 ***
## long 0.005171 0.009140 0.566 0.572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4257 on 145 degrees of freedom
## Multiple R-squared: 0.002203, Adjusted R-squared: -0.004678
## F-statistic: 0.3201 on 1 and 145 DF, p-value: 0.5724
plot(test7)test8 <- glm(data=cham_id, feconditetotale~long, family="poisson")
summary(test8)##
## Call:
## glm(formula = feconditetotale ~ long, family = "poisson", data = cham_id)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8849 -1.1120 -0.3623 0.8639 3.3159
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.24317 0.14334 1.697 0.0898 .
## long 0.07730 0.01046 7.388 1.49e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 390.54 on 162 degrees of freedom
## Residual deviance: 335.96 on 161 degrees of freedom
## AIC: 772.39
##
## Number of Fisher Scoring iterations: 5
test9 <- glm(data=cham_id, fectotrelative~long, family="quasipoisson")
summary(test9)##
## Call:
## glm(formula = fectotrelative ~ long, family = "quasipoisson",
## data = cham_id)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.06876 -0.32640 0.01389 0.26683 0.53371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.589544 0.123663 -4.767 4.15e-06 ***
## long 0.001278 0.009907 0.129 0.898
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.1504962)
##
## Null deviance: 32.342 on 162 degrees of freedom
## Residual deviance: 32.340 on 161 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
6 Lien entre fecondite annuelle et longevite des animaux
6.1 Représentation graphique des données
plot12 <- ggplot(cham2, aes(x=long, y=fec)) +
geom_count() +
labs(title = "Fécondité annuelle en fonction de la longevite",x="Longevite", y="Fécondité annuelle")+
geom_smooth()+
theme(plot.title = element_text(hjust = 0.5)); plot12plot13 <- ggplot(cham2, aes(x=long, y=fec)) +
geom_jitter(width = 0.55, height=0) +
labs(title = "Fécondité annuelle en fonction de la longevite",x="Longevite", y="Fécondité annuelle")+
geom_smooth()+
theme(plot.title = element_text(hjust = 0.5)); plot136.2 Analyse statistique du lien entre fecondite annuelle et longevite des femelles
6.2.1 Modèles de régression lineaire generalise avec effets aléatoires
6.2.1.1 First
glm11 <- glmer(fec ~ long + (1| id),data=cham2, family = binomial)
summary(glm11)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ long + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1218.0 1232.5 -606.0 1212.0 906
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7916 -1.0634 0.6517 0.7850 1.2047
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.3199 0.5656
## Number of obs: 909, groups: id, 163
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.23422 0.29351 0.798 0.425
## long 0.01175 0.02223 0.529 0.597
##
## Correlation of Fixed Effects:
## (Intr)
## long -0.956
Anova(glm11)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## long 0.2793 1 0.5971
Pas de surdispersion. Pas d’effets significatifs observes de la variable “longevite” sur la fecondite annuelle (p value > 0.05)
6.2.1.2 Second => voir avec Karim la pertinence de segmenter en deux la variable
Graphiquement, une courbe concave se dessine donc on segmente la variable “longevite” en deux segments pour tester la relation longevite/fecondite annuelle sur les deux segments.
cham_long1 <- cham2[cham2$long<15,]
cham_long2 <- cham2[cham2$long>=15,]
plot16 <- ggplot(cham_long1, aes(x=long, y=fec)) +
geom_count() +
labs(title = "Fécondité en fonction de la longevite",x="Longevite", y="Fécondité")+
geom_smooth()+
theme(plot.title = element_text(hjust = 0.5)); plot16## Warning: Removed 408 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
glm12 <- glmer(fec ~ long + (1| id),data=cham_long1, family = binomial)
summary(glm12)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ long + (1 | id)
## Data: cham_long1
##
## AIC BIC logLik deviance df.resid
## 789.0 802.2 -391.5 783.0 585
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6864 -1.0755 0.6737 0.8230 1.1503
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.1568 0.3959
## Number of obs: 588, groups: id, 120
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.66433 0.39918 -1.664 0.09606 .
## long 0.09835 0.03665 2.684 0.00728 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## long -0.971
Anova(glm12)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## long 7.2017 1 0.007283 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot17 <- ggplot(cham_long2, aes(x=long, y=fec)) +
geom_count() +
labs(title = "Fécondité en fonction de la longevite",x="Longevite", y="Fécondité")+
geom_smooth()+
theme(plot.title = element_text(hjust = 0.5)); plot17## Warning: Removed 408 rows containing non-finite values (`stat_sum()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 408 rows containing non-finite values (`stat_smooth()`).
glm13 <- glmer(fec ~ long + (1| id),data=cham_long2, family = binomial)
summary(glm13)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ long + (1 | id)
## Data: cham_long2
##
## AIC BIC logLik deviance df.resid
## 418.9 430.3 -206.5 412.9 318
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9648 -1.0836 0.5871 0.7880 1.2014
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.3949 0.6284
## Number of obs: 321, groups: id, 43
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.35885 1.43312 3.041 0.00235 **
## long -0.23441 0.08423 -2.783 0.00539 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## long -0.994
Anova(glm13)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## long 7.7448 1 0.005387 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
7 Lien entre fecondite totale et poids
7.1 Représentation graphique des données
7.1.1 Vérification de la comparabilite des poids selon les ages de capture
plot18 <- ggplot(cham_id, aes(x=AgePds, y=pds)) +
geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
labs(title = "Poids selon l'age de capture",x="Age de capture", y="Poids mesuré")+
theme(plot.title = element_text(hjust = 0.5))+
geom_smooth();plot18On observe un impact fort de l’age sur le poids pour les ages faibles. On retire les outliers et on conserve les donnees situees dans l’intervalle de confiance à 99%.
cham_pds <- cham_id[which(cham_id$pds!="NA"),]
borninf = mean(cham_pds$pds)-3*(sd(cham_pds$pds)/sqrt(length(cham_pds$pds)))
borninf## [1] 20.0376
bornsup = mean(cham_pds$pds)+3*(sd(cham_pds$pds)/sqrt(length(cham_pds$pds)))
bornsup## [1] 22.11812
?sd
sqrt(length(cham_pds$pds))## [1] 11.44552
cham_pds <- cham_pds%>%
filter(pds<=22.11812&pds>=20.0376)
plot18 <- ggplot(cham_pds, aes(x=AgePds, y=pds)) +
geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
geom_smooth();plot187.1.2 Représentation graphique de la fecondite totale en fonction du poids
plot19 <- ggplot(cham_pds, aes(x=pds, y=feconditetotale)) +
geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
geom_smooth();plot197.1.3 Représentation graphique de la longevite en fonction du poids
plot20 <- ggplot(cham_pds, aes(x=pds, y=long)) +
geom_point(color="blue", fill=rgb(0.1,0.4,0.5,0.7)) +
geom_smooth();plot207.2 Analyse statistique du lien entre fecondite annuelle/longevite et poids des femelles
7.2.1 Modèles de régression lineaire generalise avec effets aléatoires
7.2.1.1 First
lm14 <- lm(feconditetotale ~ pds,data=cham_pds)
summary(lm14)##
## Call:
## lm(formula = feconditetotale ~ pds, data = cham_pds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4002 -2.1519 -0.6555 1.4273 9.5998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.7763 23.0128 0.903 0.374
## pds -0.8274 1.0838 -0.763 0.451
##
## Residual standard error: 3.059 on 31 degrees of freedom
## Multiple R-squared: 0.01845, Adjusted R-squared: -0.01321
## F-statistic: 0.5828 on 1 and 31 DF, p-value: 0.451
lm15 <- lm(long ~ pds,data=cham_pds)
summary(lm15)##
## Call:
## lm(formula = long ~ pds, data = cham_pds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.2055 -2.7518 -0.6201 1.9458 8.7945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -49.209 27.773 -1.772 0.0863 .
## pds 2.829 1.308 2.163 0.0384 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.692 on 31 degrees of freedom
## Multiple R-squared: 0.1311, Adjusted R-squared: 0.1031
## F-statistic: 4.679 on 1 and 31 DF, p-value: 0.03837
Impact de la longevite significatif sur le poids mais pas de la fecondite totale.